{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c5561de2-6d93-47f8-9656-c1e7807ba0fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Constants\n",
    "NUM_ORDERS = 1_000_000  # Change this to generate the required number of transactions\n",
    "NUM_CUSTOMERS = 5000\n",
    "NUM_PRODUCTS = 30\n",
    "PAYMENT_METHODS = [\"Credit Card\", \"Debit Card\", \"PayPal\", \"Bank Transfer\"]\n",
    "SHIPPING_COMPANIES = [\"FedEx\", \"DHL\", \"UPS\", \"BlueDart\", \"USPS\"]\n",
    "ORDER_STATUSES = [\"Processing\", \"Shipped\", \"Delivered\", \"Pending\", \"In Transit\", \"Cancelled\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f81fff4d-c396-4680-bc4a-5517bfab6a76",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000000\n"
     ]
    }
   ],
   "source": [
    "print(NUM_ORDERS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a211d4c4-1c10-4027-91ba-c9efef36ae84",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import random\n",
    "from datetime import datetime, timedelta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "014a7f30-5025-4b60-ad86-cdb92fe16b1a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "61e090ee-dd7f-445d-b486-4211dd9d644b\n"
     ]
    }
   ],
   "source": [
    "import uuid\n",
    "\n",
    "print(uuid.uuid4())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3c75ac2d-1ff7-42de-8597-db5ff7de41ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ! pip install faker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "67bd9f5b-a65e-4411-a034-a29d8cb59f87",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Keith Martin\n",
      "bbarr@example.org\n",
      "382 Fitzpatrick Corners Suite 917\n",
      "Veronicaberg\n",
      "Namibia\n"
     ]
    }
   ],
   "source": [
    "from faker import Faker\n",
    "\n",
    "fake = Faker()\n",
    "\n",
    "print(fake.name())\n",
    "print(fake.email())\n",
    "print(fake.street_address())\n",
    "print(fake.city())\n",
    "print(fake.country())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "fda688d6-eb5e-4ff6-bed3-2397cc17c961",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Multi-layered logistical Graphical User Interface\n"
     ]
    }
   ],
   "source": [
    "print(fake.catch_phrase())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "fe67a2b1-d9ef-48f5-8084-64abd85ce487",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated 30 products and saved to ecommerce_data/products.json.\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "products_json = '''[\n",
    "    {\"product_id\": 1, \"name\": \"Wireless Noise-Canceling Headphones\", \"category\": \"Electronics\", \"price\": 249.99, \"quantity\": 50},\n",
    "    {\"product_id\": 2, \"name\": \"4K Ultra HD Smart TV\", \"category\": \"Electronics\", \"price\": 899.99, \"quantity\": 30},\n",
    "    {\"product_id\": 3, \"name\": \"Bluetooth Portable Speaker\", \"category\": \"Electronics\", \"price\": 129.99, \"quantity\": 75},\n",
    "    {\"product_id\": 4, \"name\": \"Gaming Laptop\", \"category\": \"Electronics\", \"price\": 1499.99, \"quantity\": 20},\n",
    "    {\"product_id\": 5, \"name\": \"Smartphone 5G\", \"category\": \"Electronics\", \"price\": 799.99, \"quantity\": 60},\n",
    "    {\"product_id\": 6, \"name\": \"Mechanical Gaming Keyboard\", \"category\": \"Accessories\", \"price\": 99.99, \"quantity\": 100},\n",
    "    {\"product_id\": 7, \"name\": \"Ergonomic Wireless Mouse\", \"category\": \"Accessories\", \"price\": 39.99, \"quantity\": 150},\n",
    "    {\"product_id\": 8, \"name\": \"Smartwatch with Heart Rate Monitor\", \"category\": \"Accessories\", \"price\": 199.99, \"quantity\": 80},\n",
    "    {\"product_id\": 9, \"name\": \"USB-C Charging Dock\", \"category\": \"Accessories\", \"price\": 59.99, \"quantity\": 120},\n",
    "    {\"product_id\": 10, \"name\": \"Noise-Isolating Earbuds\", \"category\": \"Accessories\", \"price\": 79.99, \"quantity\": 90},\n",
    "    {\"product_id\": 11, \"name\": \"Men's Waterproof Jacket\", \"category\": \"Clothing\", \"price\": 89.99, \"quantity\": 70},\n",
    "    {\"product_id\": 12, \"name\": \"Cotton Crew Neck T-Shirt\", \"category\": \"Clothing\", \"price\": 19.99, \"quantity\": 200},\n",
    "    {\"product_id\": 13, \"name\": \"Slim Fit Denim Jeans\", \"category\": \"Clothing\", \"price\": 49.99, \"quantity\": 100},\n",
    "    {\"product_id\": 14, \"name\": \"Running Shoes\", \"category\": \"Clothing\", \"price\": 129.99, \"quantity\": 50},\n",
    "    {\"product_id\": 15, \"name\": \"Woolen Winter Gloves\", \"category\": \"Clothing\", \"price\": 24.99, \"quantity\": 150},\n",
    "    {\"product_id\": 16, \"name\": \"Stainless Steel Cookware Set\", \"category\": \"Home\", \"price\": 159.99, \"quantity\": 40},\n",
    "    {\"product_id\": 17, \"name\": \"Premium Memory Foam Pillow\", \"category\": \"Home\", \"price\": 49.99, \"quantity\": 100},\n",
    "    {\"product_id\": 18, \"name\": \"Ceramic Dinnerware Set\", \"category\": \"Home\", \"price\": 129.99, \"quantity\": 60},\n",
    "    {\"product_id\": 19, \"name\": \"High-Powered Vacuum Cleaner\", \"category\": \"Home\", \"price\": 199.99, \"quantity\": 30},\n",
    "    {\"product_id\": 20, \"name\": \"Adjustable Standing Desk\", \"category\": \"Home\", \"price\": 299.99, \"quantity\": 25},\n",
    "    {\"product_id\": 21, \"name\": \"Yoga Mat with Non-Slip Surface\", \"category\": \"Sports\", \"price\": 39.99, \"quantity\": 120},\n",
    "    {\"product_id\": 22, \"name\": \"Adjustable Dumbbell Set\", \"category\": \"Sports\", \"price\": 179.99, \"quantity\": 50},\n",
    "    {\"product_id\": 23, \"name\": \"Tennis Racket\", \"category\": \"Sports\", \"price\": 89.99, \"quantity\": 40},\n",
    "    {\"product_id\": 24, \"name\": \"Hydration Backpack for Runners\", \"category\": \"Sports\", \"price\": 59.99, \"quantity\": 70},\n",
    "    {\"product_id\": 25, \"name\": \"Mountain Bike\", \"category\": \"Sports\", \"price\": 599.99, \"quantity\": 15},\n",
    "    {\"product_id\": 26, \"name\": \"Professional DSLR Camera\", \"category\": \"Electronics\", \"price\": 1299.99, \"quantity\": 25},\n",
    "    {\"product_id\": 27, \"name\": \"Smart Home Security Camera\", \"category\": \"Electronics\", \"price\": 149.99, \"quantity\": 75},\n",
    "    {\"product_id\": 28, \"name\": \"Noise-Canceling Office Headset\", \"category\": \"Accessories\", \"price\": 89.99, \"quantity\": 90},\n",
    "    {\"product_id\": 29, \"name\": \"Winter Puffer Jacket\", \"category\": \"Clothing\", \"price\": 129.99, \"quantity\": 40},\n",
    "    {\"product_id\": 30, \"name\": \"Home Theater Speaker System\", \"category\": \"Electronics\", \"price\": 499.99, \"quantity\": 30}\n",
    "]'''\n",
    "\n",
    "products = json.loads(products_json)\n",
    "\n",
    "# Save to JSON file\n",
    "output_file = \"ecommerce_data/products.json\"\n",
    "with open(output_file, \"w\") as f:\n",
    "    json.dump(products, f, indent=4)\n",
    "\n",
    "print(f\"Generated {NUM_PRODUCTS} products and saved to {output_file}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ca52811c-898d-431e-8f94-3a7e70d7bb82",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated 5000 customers and saved to ecommerce_data/customers.json.\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import random\n",
    "from datetime import datetime, timedelta\n",
    "import uuid\n",
    "import faker\n",
    "\n",
    "# Initialize Faker\n",
    "fake = Faker()\n",
    "\n",
    "# Generate customers\n",
    "customers = [\n",
    "    {\n",
    "        \"id\": i + 1,\n",
    "        \"name\": fake.name(),\n",
    "        \"email\": fake.email(),\n",
    "        \"address\": {\n",
    "            \"street\": fake.street_address(),\n",
    "            \"city\": fake.city(),\n",
    "            \"country\": fake.country(),\n",
    "        },\n",
    "    }\n",
    "    for i in range(NUM_CUSTOMERS)\n",
    "]\n",
    "\n",
    "# Save to JSON file\n",
    "output_file = \"ecommerce_data/customers.json\"\n",
    "with open(output_file, \"w\") as f:\n",
    "    json.dump(customers, f, indent=4)\n",
    "\n",
    "print(f\"Generated {NUM_CUSTOMERS} customers and saved to {output_file}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "81411ce5-6a43-40c0-85bd-3c4c8b2a4e67",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.49141363e-05, 1.88805772e-04, 2.15737268e-04, 1.62753830e-04,\n",
       "       1.67845631e-04, 2.48644406e-04, 1.02781599e-04, 2.09040600e-04,\n",
       "       2.22803616e-04, 1.96036910e-03, 7.61252808e-05, 2.16788695e-04,\n",
       "       1.10041288e-04, 1.81083570e-05, 6.79679805e-04, 2.52621609e-06,\n",
       "       2.03857318e-04, 1.22779840e-04, 1.73779284e-04, 6.38071904e-05,\n",
       "       2.06515019e-04, 3.53933735e-04, 5.20201619e-04, 6.84764117e-05,\n",
       "       3.80758381e-04])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Use Pareto distribution for realistic customer purchasing behavior\n",
    "customer_weights = np.random.pareto(a=3, size=NUM_CUSTOMERS)  # Shape parameter `a=3`\n",
    "customer_weights = customer_weights / customer_weights.sum()  # Normalize to sum 1\n",
    "\n",
    "customer_weights[:25]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "fdc37253-486e-4ecf-a99a-55dd71e22ece",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.0877193 , 0.05263158, 0.05263158, 0.01754386, 0.01754386])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Use Zipf distribution for product sales trends (realistic long-tail distribution)\n",
    "product_weights = np.random.zipf(a=2, size=NUM_PRODUCTS)  # Shape parameter `a=2`\n",
    "\n",
    "product_weights = product_weights / product_weights.sum()  # Normalize\n",
    "\n",
    "product_weights[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b02d674f-9269-4ed3-bfd8-01f6682738c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define weighted probabilities for customers, products, and payment methods\n",
    "PAYMENT_METHODS = {\n",
    "    \"Credit Card\": 0.5,\n",
    "    \"PayPal\": 0.3,\n",
    "    \"UPI\": 0.1,\n",
    "    \"Bank Transfer\": 0.1,\n",
    "}\n",
    "\n",
    "PAYMENT_METHODS = [\"Credit Card\", \"Debit Card\", \"PayPal\", \"Bank Transfer\"]\n",
    "\n",
    "sum([0.5, 0.15, 0.3, 0.05])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "72b728e8-260d-43c3-91ce-203432dc9167",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5000"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(customers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "6f15976d-8fed-46d0-a5ef-9db3de2788b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(customer_weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "2422a1a5-0285-4244-a89b-cfdae085066d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 4338,\n",
       " 'name': 'Connie Moran',\n",
       " 'email': 'ashley38@example.com',\n",
       " 'address': {'street': '37280 Mitchell Ford',\n",
       "  'city': 'Port Richard',\n",
       "  'country': 'Wallis and Futuna'}}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Select a customer based on dynamically generated weights\n",
    "customer = random.choices(customers, weights=customer_weights, k=1)[0]\n",
    "\n",
    "customer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b946da6a-2299-4acb-99d2-673e024dfa06",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'product_id': 3,\n",
       "  'name': 'Bluetooth Portable Speaker',\n",
       "  'category': 'Electronics',\n",
       "  'price': 129.99,\n",
       "  'quantity': 75},\n",
       " {'product_id': 26,\n",
       "  'name': 'Professional DSLR Camera',\n",
       "  'category': 'Electronics',\n",
       "  'price': 1299.99,\n",
       "  'quantity': 25}]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Select products based on dynamically generated weights\n",
    "num_items = 2\n",
    "\n",
    "selected_products = random.choices(products, weights=product_weights, k=num_items)\n",
    "\n",
    "selected_products"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "cf377502-26b8-4e32-9ebe-cba491908db1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Processing', 'Shipped', 'Delivered', 'Pending', 'In Transit', 'Cancelled']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ORDER_STATUSES\n",
    "#weights=[0.1, 0.2, 0.4, 0.2, 0.05, 0.05]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "632cf8cc-cc4b-4107-a047-6b81df086402",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum([0.1, 0.2, 0.445, 0.2, 0.05, 0.005])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79d4ad38-bbf1-4371-85b6-4eece98dac7e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51d68f2e-e3fe-4802-b86e-553f82f411b5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "3c7b28a1-96f0-4db5-ba02-06d6dbb59366",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to create a more realistic order\n",
    "def generate_order(order_id):\n",
    "    customer = random.choices(customers, weights=customer_weights, k=1)[0]\n",
    "    \n",
    "    # Number of items (higher probability for 2-3 items per order)\n",
    "    num_items = random.choices([1, 2, 3, 4], weights=[0.2, 0.4, 0.3, 0.1], k=1)[0]\n",
    "    \n",
    "    # Selecting products based on weighted probability\n",
    "    selected_products = random.choices(products, weights=product_weights, k=num_items)\n",
    "    \n",
    "    items = [\n",
    "        {\n",
    "            \"product_id\": product[\"product_id\"],\n",
    "            \"name\": product[\"name\"],\n",
    "            \"category\": product[\"category\"],\n",
    "            \"price\": product[\"price\"],\n",
    "            \"quantity\": random.randint(1, 3 if product[\"price\"] < 1000 else 1),  # More quantity for cheaper products\n",
    "        }\n",
    "        for product in selected_products\n",
    "    ]\n",
    "    \n",
    "    total_price = sum(item[\"price\"] * item[\"quantity\"] for item in items)\n",
    "    discount = round(random.uniform(0, total_price * 0.15), 2)  # Up to 15% discount\n",
    "\n",
    "    payment = {\n",
    "        \"method\": random.choices(PAYMENT_METHODS, weights=[0.5, 0.15, 0.3, 0.05], k=1)[0],\n",
    "        \"transaction_id\": str(uuid.uuid4())[:10].upper(),\n",
    "        \"discount_applied\": discount,\n",
    "    }\n",
    "\n",
    "    # Simulate order placement times (more orders on weekends & evenings)\n",
    "    days_offset = random.choices(range(10), weights=[1, 1, 2, 2, 3, 4, 5, 6, 7, 8], k=1)[0]\n",
    "    order_created = datetime.now() - timedelta(days=days_offset)\n",
    "    order_time = order_created.replace(hour=random.choice([10, 12, 14, 16, 18, 20, 22]), minute=random.randint(0, 59))\n",
    "\n",
    "    # Simulating realistic delivery time\n",
    "    expected_delivery_date = order_time + timedelta(days=random.randint(2, 7))\n",
    "    delivery_status = random.choices(\n",
    "        ORDER_STATUSES, weights=[0.1, 0.2, 0.445, 0.2, 0.05, 0.005], k=1\n",
    "    )[0]\n",
    "\n",
    "    delivery = {\n",
    "        \"status\": delivery_status,\n",
    "        \"tracking_id\": str(uuid.uuid4())[:10].upper() if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"] else None,\n",
    "        \"shipping_company\": random.choice(SHIPPING_COMPANIES) if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"] else None,\n",
    "        \"expected_delivery_date\": expected_delivery_date.strftime(\"%Y-%m-%d\"),\n",
    "    }\n",
    "\n",
    "    # Order history to reflect real progression\n",
    "    order_history = [{\"status\": \"Processing\", \"timestamp\": order_time.strftime(\"%Y-%m-%dT%H:%M:%S\")}]\n",
    "\n",
    "    if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"]:\n",
    "        shipped_date = order_time + timedelta(days=random.randint(1, 3))\n",
    "        order_history.append({\"status\": \"Shipped\", \"timestamp\": shipped_date.strftime(\"%Y-%m-%dT%H:%M:%S\")})\n",
    "\n",
    "    if delivery_status == \"Delivered\":\n",
    "        delivered_date = shipped_date + timedelta(days=random.randint(1, 3))\n",
    "        order_history.append({\"status\": \"Delivered\", \"timestamp\": delivered_date.strftime(\"%Y-%m-%dT%H:%M:%S\")})\n",
    "\n",
    "    return {\n",
    "        \"order_id\": f\"ORD{order_id}\",\n",
    "        \"customer\": customer,\n",
    "        \"items\": items,\n",
    "        \"payment\": payment,\n",
    "        \"delivery\": delivery,\n",
    "        \"order_history\": order_history,\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "b291c543-d48f-4bdd-bce7-25429a910132",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3min 24s, sys: 4.53 s, total: 3min 28s\n",
      "Wall time: 3min 28s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# Generate orders\n",
    "orders = [generate_order(i) for i in range(1, NUM_ORDERS + 1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "5d243bbf-3b16-4d25-84f9-ffec7ce605be",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated 1000000 orders and saved to ecommerce_data/orders.json.\n"
     ]
    }
   ],
   "source": [
    "# Save to JSON file\n",
    "output_file = \"ecommerce_data/orders.json\"\n",
    "with open(output_file, \"w\") as f:\n",
    "    json.dump(orders, f, indent=4)\n",
    "\n",
    "print(f\"Generated {NUM_ORDERS} orders and saved to {output_file}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26c26fc9-d447-4ab3-8838-41c185be9836",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ee9bf37-7180-43d8-9355-ae245010feda",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a698d98-499c-41cb-8ce2-b109365b236c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16d33e8a-88d8-4756-bee5-0110d4151c1c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb8abe47-a0f2-43ce-9464-48d9d7f8fcc0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d215a495-ad71-4a18-9232-a9138b60dd06",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89dadf40-8c4e-4032-9470-4665c666237c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1242914b-d7cb-4aeb-9243-e2ead40ee2d0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dda217ff-a387-4f8a-9ecc-40d7689cc8b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Function to create a random order\n",
    "# def generate_order(order_id):\n",
    "#     customer = random.choice(customers)\n",
    "#     num_items = random.randint(1, 5)\n",
    "#     items = [\n",
    "#         {\n",
    "#             \"product_id\": product[\"product_id\"],\n",
    "#             \"name\": product[\"name\"],\n",
    "#             \"category\": product[\"category\"],\n",
    "#             \"price\": product[\"price\"],\n",
    "#             \"quantity\": random.randint(1, 3),\n",
    "#         }\n",
    "#         for product in random.sample(products, num_items)\n",
    "#     ]\n",
    "    \n",
    "#     total_price = sum(item[\"price\"] * item[\"quantity\"] for item in items)\n",
    "#     discount = round(random.uniform(0, total_price * 0.1), 2)\n",
    "    \n",
    "#     payment = {\n",
    "#         \"method\": random.choice(PAYMENT_METHODS),\n",
    "#         \"transaction_id\": str(uuid.uuid4())[:10].upper(),\n",
    "#         \"discount_applied\": discount,\n",
    "#     }\n",
    "    \n",
    "#     expected_delivery_date = datetime.now() + timedelta(days=random.randint(2, 7))\n",
    "#     delivery_status = random.choice(ORDER_STATUSES)\n",
    "    \n",
    "#     delivery = {\n",
    "#         \"status\": delivery_status,\n",
    "#         \"tracking_id\": str(uuid.uuid4())[:10].upper() if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"] else None,\n",
    "#         \"shipping_company\": random.choice(SHIPPING_COMPANIES) if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"] else None,\n",
    "#         \"expected_delivery_date\": expected_delivery_date.strftime(\"%Y-%m-%d\"),\n",
    "#     }\n",
    "\n",
    "#     # Generate order history\n",
    "#     order_history = []\n",
    "#     order_created = datetime.now() - timedelta(days=random.randint(5, 10))\n",
    "#     order_history.append({\"status\": \"Processing\", \"timestamp\": order_created.strftime(\"%Y-%m-%dT%H:%M:%S\")})\n",
    "\n",
    "#     if delivery_status in [\"Shipped\", \"Delivered\", \"In Transit\"]:\n",
    "#         shipped_date = order_created + timedelta(days=random.randint(1, 3))\n",
    "#         order_history.append({\"status\": \"Shipped\", \"timestamp\": shipped_date.strftime(\"%Y-%m-%dT%H:%M:%S\")})\n",
    "\n",
    "#     if delivery_status == \"Delivered\":\n",
    "#         delivered_date = shipped_date + timedelta(days=random.randint(1, 3))\n",
    "#         order_history.append({\"status\": \"Delivered\", \"timestamp\": delivered_date.strftime(\"%Y-%m-%dT%H:%M:%S\")})\n",
    "\n",
    "#     return {\n",
    "#         \"order_id\": f\"ORD{order_id}\",\n",
    "#         \"customer\": customer,\n",
    "#         \"items\": items,\n",
    "#         \"payment\": payment,\n",
    "#         \"delivery\": delivery,\n",
    "#         \"order_history\": order_history,\n",
    "#     }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e5aa37ac-ceef-4840-8fcf-5fc99c08c198",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 17 s, sys: 2.76 s, total: 19.8 s\n",
      "Wall time: 19.8 s\n"
     ]
    }
   ],
   "source": [
    "# %%time\n",
    "\n",
    "# # Generate orders\n",
    "# orders = [generate_order(i) for i in range(1, NUM_ORDERS + 1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "dc7f2559-e719-43f4-a202-21a86e8e7546",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated 1000000 orders and saved to ecommerce_data/orders.json.\n"
     ]
    }
   ],
   "source": [
    "# # Save to JSON file\n",
    "# output_file = \"ecommerce_data/orders.json\"\n",
    "# with open(output_file, \"w\") as f:\n",
    "#     json.dump(orders, f, indent=4)\n",
    "\n",
    "# print(f\"Generated {NUM_ORDERS} orders and saved to {output_file}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6654da67-a3f3-4a4e-bbf3-948117e0fe07",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
